import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import datetime as dt
import calendar
import plotly.express as px
import plotly.graph_objects as go
df=pd.read_csv("C:\\Users\\Rutvik\\Downloads\\Unemployment_Rate_upto_11_2020.csv")
df
| Region | Date | Frequency | Estimated Unemployment Rate (%) | Estimated Employed | Estimated Labour Participation Rate (%) | Region.1 | longitude | latitude | |
|---|---|---|---|---|---|---|---|---|---|
| 0 | Andhra Pradesh | 31-01-2020 | M | 5.48 | 16635535 | 41.02 | South | 15.9129 | 79.740 |
| 1 | Andhra Pradesh | 29-02-2020 | M | 5.83 | 16545652 | 40.90 | South | 15.9129 | 79.740 |
| 2 | Andhra Pradesh | 31-03-2020 | M | 5.79 | 15881197 | 39.18 | South | 15.9129 | 79.740 |
| 3 | Andhra Pradesh | 30-04-2020 | M | 20.51 | 11336911 | 33.10 | South | 15.9129 | 79.740 |
| 4 | Andhra Pradesh | 31-05-2020 | M | 17.43 | 12988845 | 36.46 | South | 15.9129 | 79.740 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 262 | West Bengal | 30-06-2020 | M | 7.29 | 30726310 | 40.39 | East | 22.9868 | 87.855 |
| 263 | West Bengal | 31-07-2020 | M | 6.83 | 35372506 | 46.17 | East | 22.9868 | 87.855 |
| 264 | West Bengal | 31-08-2020 | M | 14.87 | 33298644 | 47.48 | East | 22.9868 | 87.855 |
| 265 | West Bengal | 30-09-2020 | M | 9.35 | 35707239 | 47.73 | East | 22.9868 | 87.855 |
| 266 | West Bengal | 31-10-2020 | M | 9.98 | 33962549 | 45.63 | East | 22.9868 | 87.855 |
267 rows × 9 columns
df.head(5)
| Region | Date | Frequency | Estimated Unemployment Rate (%) | Estimated Employed | Estimated Labour Participation Rate (%) | Region.1 | longitude | latitude | |
|---|---|---|---|---|---|---|---|---|---|
| 0 | Andhra Pradesh | 31-01-2020 | M | 5.48 | 16635535 | 41.02 | South | 15.9129 | 79.74 |
| 1 | Andhra Pradesh | 29-02-2020 | M | 5.83 | 16545652 | 40.90 | South | 15.9129 | 79.74 |
| 2 | Andhra Pradesh | 31-03-2020 | M | 5.79 | 15881197 | 39.18 | South | 15.9129 | 79.74 |
| 3 | Andhra Pradesh | 30-04-2020 | M | 20.51 | 11336911 | 33.10 | South | 15.9129 | 79.74 |
| 4 | Andhra Pradesh | 31-05-2020 | M | 17.43 | 12988845 | 36.46 | South | 15.9129 | 79.74 |
df.size
2403
df.shape
(267, 9)
df.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 267 entries, 0 to 266 Data columns (total 9 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Region 267 non-null object 1 Date 267 non-null object 2 Frequency 267 non-null object 3 Estimated Unemployment Rate (%) 267 non-null float64 4 Estimated Employed 267 non-null int64 5 Estimated Labour Participation Rate (%) 267 non-null float64 6 Region.1 267 non-null object 7 longitude 267 non-null float64 8 latitude 267 non-null float64 dtypes: float64(4), int64(1), object(4) memory usage: 18.9+ KB
df.describe()
| Estimated Unemployment Rate (%) | Estimated Employed | Estimated Labour Participation Rate (%) | longitude | latitude | |
|---|---|---|---|---|---|
| count | 267.000000 | 2.670000e+02 | 267.000000 | 267.000000 | 267.000000 |
| mean | 12.236929 | 1.396211e+07 | 41.681573 | 22.826048 | 80.532425 |
| std | 10.803283 | 1.336632e+07 | 7.845419 | 6.270731 | 5.831738 |
| min | 0.500000 | 1.175420e+05 | 16.770000 | 10.850500 | 71.192400 |
| 25% | 4.845000 | 2.838930e+06 | 37.265000 | 18.112400 | 76.085600 |
| 50% | 9.650000 | 9.732417e+06 | 40.390000 | 23.610200 | 79.019300 |
| 75% | 16.755000 | 2.187869e+07 | 44.055000 | 27.278400 | 85.279900 |
| max | 75.850000 | 5.943376e+07 | 69.690000 | 33.778200 | 92.937600 |
type(df)
pandas.core.frame.DataFrame
df.mean()
C:\Users\Rutvik\AppData\Local\Temp\ipykernel_4624\3698961737.py:1: FutureWarning: Dropping of nuisance columns in DataFrame reductions (with 'numeric_only=None') is deprecated; in a future version this will raise TypeError. Select only valid columns before calling the reduction. df.mean()
Estimated Unemployment Rate (%) 1.223693e+01 Estimated Employed 1.396211e+07 Estimated Labour Participation Rate (%) 4.168157e+01 longitude 2.282605e+01 latitude 8.053242e+01 dtype: float64
df.var()
C:\Users\Rutvik\AppData\Local\Temp\ipykernel_4624\1568254755.py:1: FutureWarning: Dropping of nuisance columns in DataFrame reductions (with 'numeric_only=None') is deprecated; in a future version this will raise TypeError. Select only valid columns before calling the reduction. df.var()
Estimated Unemployment Rate (%) 1.167109e+02 Estimated Employed 1.786585e+14 Estimated Labour Participation Rate (%) 6.155061e+01 longitude 3.932206e+01 latitude 3.400916e+01 dtype: float64
df.std()
C:\Users\Rutvik\AppData\Local\Temp\ipykernel_4624\3390915376.py:1: FutureWarning: Dropping of nuisance columns in DataFrame reductions (with 'numeric_only=None') is deprecated; in a future version this will raise TypeError. Select only valid columns before calling the reduction. df.std()
Estimated Unemployment Rate (%) 1.080328e+01 Estimated Employed 1.336632e+07 Estimated Labour Participation Rate (%) 7.845419e+00 longitude 6.270731e+00 latitude 5.831738e+00 dtype: float64
df.isnull().sum()
Region 0 Date 0 Frequency 0 Estimated Unemployment Rate (%) 0 Estimated Employed 0 Estimated Labour Participation Rate (%) 0 Region.1 0 longitude 0 latitude 0 dtype: int64
df.corr()
| Estimated Unemployment Rate (%) | Estimated Employed | Estimated Labour Participation Rate (%) | longitude | latitude | |
|---|---|---|---|---|---|
| Estimated Unemployment Rate (%) | 1.000000 | -0.245176 | -0.073540 | 0.149976 | -0.023976 |
| Estimated Employed | -0.245176 | 1.000000 | -0.047948 | -0.113664 | -0.119321 |
| Estimated Labour Participation Rate (%) | -0.073540 | -0.047948 | 1.000000 | 0.080372 | 0.397836 |
| longitude | 0.149976 | -0.113664 | 0.080372 | 1.000000 | 0.125895 |
| latitude | -0.023976 | -0.119321 | 0.397836 | 0.125895 | 1.000000 |
plt.style.use('seaborn-whitegrid')
plt.figure(figsize=(14, 12))
sns.heatmap(df.corr())
plt.show()
#unemployment rate according to different regions of India
df.columns= ["States","Date","Frequency",
"Estimated Unemployment Rate","Estimated Employed",
"Estimated Labour Participation Rate","Region",
"longitude","latitude"]
plt.figure(figsize=(10, 8))
plt.title("Indian Unemployment")
sns.histplot(x="Estimated Unemployment Rate", hue="Region", data=df)
plt.show()
sns.pairplot(df)
<seaborn.axisgrid.PairGrid at 0x17b04f95a30>
unemploment = df[["States", "Region", "Estimated Unemployment Rate"]]
figure = px.sunburst(unemploment, path=["Region", "States"],
values="Estimated Unemployment Rate",
width=500, height=500, color_continuous_scale="RdY1Gn",
title="Unemployment Rate in India")
figure.show()
C:\Users\Rutvik\anaconda3\anaconda\lib\site-packages\plotly\express\_core.py:1637: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead. df_all_trees = df_all_trees.append(df_tree, ignore_index=True) C:\Users\Rutvik\anaconda3\anaconda\lib\site-packages\plotly\express\_core.py:1637: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead. df_all_trees = df_all_trees.append(df_tree, ignore_index=True)
df.States.value_counts()
Andhra Pradesh 10 Assam 10 Uttarakhand 10 Uttar Pradesh 10 Tripura 10 Telangana 10 Tamil Nadu 10 Rajasthan 10 Punjab 10 Puducherry 10 Odisha 10 Meghalaya 10 Maharashtra 10 Madhya Pradesh 10 Kerala 10 Karnataka 10 Jharkhand 10 Himachal Pradesh 10 Haryana 10 Gujarat 10 Goa 10 Delhi 10 Chhattisgarh 10 Bihar 10 West Bengal 10 Jammu & Kashmir 9 Sikkim 8 Name: States, dtype: int64
state = df.groupby(["States"])[["Estimated Unemployment Rate", "Estimated Employed", "Estimated Labour Participation Rate"]].mean()
state = pd.DataFrame(state).reset_index()
# Box plot
fig = px.box(df,x='States',y='Estimated Unemployment Rate',color='States',title='Unemployment Rate')
fig.update_layout(xaxis={'categoryorder':'total descending'})
fig.show()
# average unemployment rate bar plot
fig = px.bar(state, x='States', y="Estimated Unemployment Rate", color="States", title="Average Unemploment Rate (State)")
fig.update_layout(xaxis={'categoryorder':'total descending'})
fig.show()
df.Region.unique()
array(['South', 'Northeast', 'East', 'West', 'North'], dtype=object)
# numeric data grouped by region
region = df.groupby(["Region"])[['Estimated Unemployment Rate', "Estimated Employed", "Estimated Labour Participation Rate"]].mean()
region = pd.DataFrame(region).reset_index()
# scatter plot
fig = px.scatter_matrix(df, dimensions=['Estimated Unemployment Rate','Estimated Employed','Estimated Labour Participation Rate'], color='Region')
fig.show()
# Average Unemployment Rate
fig = px.bar(region, x="Region", y="Estimated Unemployment Rate", color="Region", title="Average Unemployment Rate (Region)")
fig.update_layout(xaxis={'categoryorder':'total descending'})
fig.show()
unemployment = df.groupby(['Region','States'])['Estimated Unemployment Rate'].mean().reset_index()
unemployment.head()
| Region | States | Estimated Unemployment Rate | |
|---|---|---|---|
| 0 | East | Bihar | 19.471 |
| 1 | East | Jharkhand | 19.539 |
| 2 | East | Odisha | 6.462 |
| 3 | East | West Bengal | 10.192 |
| 4 | North | Delhi | 18.414 |
df['Date'] = pd.to_datetime(df['Date'],dayfirst=True)
df['Frequency']= df['Frequency'].astype('category')
df['Month'] = df['Date'].dt.month
df['Month_int'] = df['Month'].apply(lambda x : int(x))
df.head(10)
| States | Date | Frequency | Estimated Unemployment Rate | Estimated Employed | Estimated Labour Participation Rate | Region | longitude | latitude | Month | Month_int | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Andhra Pradesh | 2020-01-31 | M | 5.48 | 16635535 | 41.02 | South | 15.9129 | 79.74 | 1 | 1 |
| 1 | Andhra Pradesh | 2020-02-29 | M | 5.83 | 16545652 | 40.90 | South | 15.9129 | 79.74 | 2 | 2 |
| 2 | Andhra Pradesh | 2020-03-31 | M | 5.79 | 15881197 | 39.18 | South | 15.9129 | 79.74 | 3 | 3 |
| 3 | Andhra Pradesh | 2020-04-30 | M | 20.51 | 11336911 | 33.10 | South | 15.9129 | 79.74 | 4 | 4 |
| 4 | Andhra Pradesh | 2020-05-31 | M | 17.43 | 12988845 | 36.46 | South | 15.9129 | 79.74 | 5 | 5 |
| 5 | Andhra Pradesh | 2020-06-30 | M | 3.31 | 19805400 | 47.41 | South | 15.9129 | 79.74 | 6 | 6 |
| 6 | Andhra Pradesh | 2020-07-31 | M | 8.34 | 15431615 | 38.91 | South | 15.9129 | 79.74 | 7 | 7 |
| 7 | Andhra Pradesh | 2020-08-31 | M | 6.96 | 15251776 | 37.83 | South | 15.9129 | 79.74 | 8 | 8 |
| 8 | Andhra Pradesh | 2020-09-30 | M | 6.40 | 15220312 | 37.47 | South | 15.9129 | 79.74 | 9 | 9 |
| 9 | Andhra Pradesh | 2020-10-31 | M | 6.59 | 15157557 | 37.34 | South | 15.9129 | 79.74 | 10 | 10 |
import plotly.express as px
import plotly.graph_objects as go
import plotly.figure_factory as ff
from IPython.display import HTML
import calendar
import datetime as dt
import plotly.io as pio
pio.templates
Templates configuration
-----------------------
Default template: 'plotly'
Available templates:
['ggplot2', 'seaborn', 'simple_white', 'plotly',
'plotly_white', 'plotly_dark', 'presentation', 'xgridoff',
'ygridoff', 'gridon', 'none']
lock = df[(df['Month_int'] >= 4) & (df['Month_int'] <=7)]
bf_lock = df[(df['Month_int'] >= 1) & (df['Month_int'] <=4)]
g_lock = lock.groupby('States')['Estimated Unemployment Rate'].mean().reset_index()
g_bf_lock = bf_lock.groupby('States')['Estimated Unemployment Rate'].mean().reset_index()
g_lock['Unemployment Rate before lockdown'] = g_bf_lock['Estimated Unemployment Rate']
g_lock.columns = ['States','Unemployment Rate after lockdown','Unemployment Rate before lockdown']
g_lock['percentage change in unemployment'] = round(g_lock['Unemployment Rate after lockdown'] - g_lock['Unemployment Rate before lockdown']/g_lock['Unemployment Rate before lockdown'],2)
plot_per = g_lock.sort_values('percentage change in unemployment')
fig = px.bar(plot_per, x='States',y='percentage change in unemployment',color='percentage change in unemployment',
title='percentage change in Unemployment in each state after lockdown',template='ggplot2')
fig.show()
plot_per['impact status'] = plot_per['percentage change in unemployment']
fig = px.bar(plot_per, y='States',x='percentage change in unemployment',color='impact status',
title='Impact of lockdown on employment across states',template='ggplot2',height=650)
fig.show()